SF3B1 splicing analysis

Splicing maps

Author
Affiliation
Dr. Mirko Brueggemann

Buchman Institute for Molecular Life Sciences

Published

September 14, 2023

1 Analysis description

This report holds all analysis and plots used to create main figure 5 A-C with and all related supplementary figures.

1.1 Load libraries

Show code
library(knitr)
library(grid)
library(gridGraphics)
library(gridExtra)
library(scales)
library(reshape2)
library(ggplot2)
library(rtracklayer)
library(GenomicFeatures)
library(GenomicAlignments)
library(viridis)
library(tibble)
library(dplyr)
library(tidyr)
library(ComplexHeatmap)
library(kableExtra)
library(GenomicRanges)
library(BindingSiteFinder)
library(readr)
library(circlize)
library(patchwork)
library(ggsci)
library(ggbeeswarm)
library(zoo)
Show code
source("../styles.R")
source("../helper.R")

2 Significant splicing events

Show code
# import table and format
table3As <- read_csv("~/Projects/sf3b1/03_Splicing/results202212/golong_all_2022_12_diff_betabinomial_less_strict_3AS_all.csv")
standChrs = c(paste0("chr", seq(1:22)), "chrX", "chrY")

table3As = table3As %>% 
  as.data.frame() %>%
  select(gene, gene_id, chrom, strand, start, alternative, end, padj, all_mut_PSI, all_wt_PSI, total_PSI) %>%
  rownames_to_column() %>%
  subset(chrom %in% standChrs) %>%
  mutate(end = end + 1) %>%
  mutate(alternative = ifelse(strand == "+", alternative + 1, alternative)) %>%
  mutate(deltaPSI = all_mut_PSI - all_wt_PSI) %>%
  mutate(sig = ifelse(padj < 0.1, TRUE, FALSE)) %>%
  mutate(distAltEnd = ifelse(strand == "+", (alternative - end)*-1, (start - alternative)*-1 ))

table3As$id = 1:nrow(table3As)

# make granges
rngEnd = GRanges(
  seqnames = table3As$chrom,
  ranges = IRanges(start = table3As$start,
                   end = table3As$end),
  gene = table3As$gene,
  geneID = table3As$gene_id,
  strand = table3As$strand,
  padj = table3As$padj,
  deltaPSI = table3As$deltaPSI,
  sig = table3As$sig,
  distAltEnd = table3As$distAltEnd,
  id = table3As$id,
  wtPSI = table3As$all_wt_PSI,
  mutPSI = table3As$all_mut_PSI
)
names(rngEnd) = c(1:length(rngEnd))

table3AsPlus = subset(table3As, strand == "+")
table3AsMinus = subset(table3As, strand == "-")

rngAltPlus = GRanges(
  seqnames = table3AsPlus$chrom,
  ranges = IRanges(start = table3AsPlus$start,
                   end = table3AsPlus$alternative),
  gene = table3AsPlus$gene,
  geneID = table3AsPlus$gene_id,
  strand = table3AsPlus$strand,
  padj = table3AsPlus$padj,
  deltaPSI = table3AsPlus$deltaPSI,
  sig = table3AsPlus$sig,
  distAltEnd = table3AsPlus$distAltEnd,
  id = table3AsPlus$id,
  wtPSI = table3AsPlus$all_wt_PSI,
  mutPSI = table3AsPlus$all_mut_PSI
)
rngAltMinus = GRanges(
  seqnames = table3AsMinus$chrom,
  ranges = IRanges(start = table3AsMinus$alternative,
                   end = table3AsMinus$end),
  gene = table3AsMinus$gene,
  geneID = table3AsMinus$gene_id,
  strand = table3AsMinus$strand,
  padj = table3AsMinus$padj,
  deltaPSI = table3AsMinus$deltaPSI,
  sig = table3AsMinus$sig,
  distAltEnd = table3AsMinus$distAltEnd,
  id = table3AsMinus$id,
  wtPSI = table3AsMinus$all_wt_PSI,
  mutPSI = table3AsMinus$all_mut_PSI
)

rngAlt = c(rngAltPlus, rngAltMinus)
rngAlt = rngAlt[order(rngAlt$id)]
names(rngAlt) = c(1:length(rngAlt))
saveRDS(rngEnd, file = "./data/rngEnd.rds")
saveRDS(rngAlt, file = "./data/rngAlt.rds")
export(rngEnd, con = "./data/rngEnd.bed", format = "BED")
export(rngAlt, con = "./data/rngAlt.bed", format = "BED")
Show code
df = rngEnd %>% 
    as.data.frame() %>%
    mutate(region = ifelse(distAltEnd >= 12 & distAltEnd <= 21, "Affected", "Not Affected"))

ggplot(df, aes(x = distAltEnd, y = padj, group = distAltEnd, fill = region)) +
    geom_boxplot(outlier.size = 0.1, lwd = 0.5) +
    xlim(0,200) +
    theme_pub() +
    scale_fill_npg() +
    theme(legend.position = "top") +
    labs(
        x = "Distance between splice sites [nt]",
        y = "Adjusted P value",
        fill = "Region type"
    )

Significance of alternative 3’ splicing events by distance between canonical and alternative splicing pairs.

Show code
ggsave("splicingEvents_200.pdf", width = 10, height = 4, device = "pdf")
Show code
df = rngEnd %>% 
    as.data.frame() %>%
    mutate(region = ifelse(distAltEnd >= 12 & distAltEnd <= 21, "Affected", "Not Affected"))

ggplot(df, aes(x = distAltEnd, y = padj, group = distAltEnd, fill = region)) +
    geom_boxplot(outlier.size = 0.1, lwd = 0.5) +
    xlim(0,100) +
    theme_pub() +
    scale_fill_npg() +
    theme(legend.position = "top") +
    labs(
        x = "Distance between splice sites [nt]",
        y = "Adjusted P value",
        fill = "Region type"
    )

Significance of alternative 3’ splicing events by distance between canonical and alternative splicing pairs.

Show code
ggsave("splicingEvents_100.pdf", width = 10, height = 4, device = "pdf")

2.1 Zoom-in version

Show code
ggplot(df, aes(x = distAltEnd, y = padj, group = distAltEnd, fill = region)) +
    geom_boxplot(outlier.size = 0.1, lwd = 0.5) +
    xlim(0,50) +
    theme_pub() +
    scale_fill_npg() +
    theme(legend.position = "top") +
    labs(
        x = "Distance between splice sites [nt]",
        y = "Adjusted P value",
        fill = "Region type"
    )

Show code
ggsave("splicingEvents_50.pdf", width = 4, height = 4, device = "pdf")
Show code
dfN = df %>% 
    filter(sig == TRUE) %>%
    group_by(distAltEnd) %>%
    summarise(n = n()) %>%
    mutate(region = ifelse(distAltEnd >= 12 & distAltEnd <= 21, "Affected", "Not Affected"))

ggplot(dfN, aes(x = distAltEnd, y = n, group = distAltEnd, fill = region)) +
  geom_col(width = 0.9) +
  xlim(0,50) +
  theme_pub() +
    scale_fill_npg() +
    theme(legend.position = "top") +
    labs(
        x = "Distance between splice sites [nt]",
        y = "#N [Significant]",
        fill = "Region type"
    )

Show code
ggsave("splicingEvents_hist_50.pdf", width = 4, height = 4, device = "pdf")

3 Splicing meta profile maps

Show code
# Load clip data SF3B1 WT
clipFilesWt = "/Users/mirko/Projects/sf3b1/01_data_subsamp/wt/cov/replicate"
clipFiles = c(clipFilesWt)
clipFiles = list.files(clipFiles, pattern = ".bw$", full.names = TRUE)
clipFilesP = clipFiles[grep(clipFiles, pattern = "Plus")]
clipFilesM = clipFiles[grep(clipFiles, pattern = "Minus")]
# Organize clip data in dataframe
colData = data.frame(
  id = 1:3,
  condition = factor(c("WT", "WT", "WT")),
  clPlus = clipFilesP,
  clMinus = clipFilesM)
bdsSF3B1_WT = BSFDataSetFromBigWig(ranges = rngEnd, meta = colData)

# Load clip data SF3B1 MUT
clipFilesMut = "/Users/mirko/Projects/sf3b1/01_data_subsamp/mut/cov/replicate"
clipFiles = c(clipFilesMut)
clipFiles = list.files(clipFiles, pattern = ".bw$", full.names = TRUE)
clipFilesP = clipFiles[grep(clipFiles, pattern = "Plus")]
clipFilesM = clipFiles[grep(clipFiles, pattern = "Minus")]
# Organize clip data in dataframe
colData = data.frame(
  id = 1:2,
  condition = factor(c("MUT", "MUT")),
  clPlus = clipFilesP,
  clMinus = clipFilesM)
bdsSF3B1_MUT = BSFDataSetFromBigWig(ranges = rngEnd, meta = colData)

# Load clip data U2AF65 WT
clipFilesWt = "/Users/mirko/Projects/BindingSiteStrength/01_data/05_U2AF65_cellLines/K562/cov/replicates/"
clipFiles = c(clipFilesWt)
clipFiles = list.files(clipFiles, pattern = ".bw$", full.names = TRUE)
clipFilesP = clipFiles[grep(clipFiles, pattern = "plus")]
clipFilesM = clipFiles[grep(clipFiles, pattern = "minus")]
# Organize clip data in dataframe
colData = data.frame(
  id = 1:4,
  condition = factor(rep("WT",4)),
  clPlus = clipFilesP,
  clMinus = clipFilesM)
# creating objects and importing clip siganl
bdsU2AF2_WT = BSFDataSetFromBigWig(ranges = rngEnd, meta = colData)

# Load eclip data SF3B1 WT
clipFilesWt = "/Users/mirko/Projects/sf3b1/04_eCLIP/sf3b1/"
clipFiles = c(clipFilesWt)
clipFiles = list.files(clipFiles, pattern = ".bw$", full.names = TRUE)
clipFilesP = clipFiles[grep(clipFiles, pattern = "plus")]
clipFilesM = clipFiles[grep(clipFiles, pattern = "minus")]
# Organize clip data in dataframe
colData = data.frame(
  id = 1:2,
  condition = factor(rep("WT",2)),
  clPlus = clipFilesP,
  clMinus = clipFilesM)
# creating objects and importing clip siganl
bdsSF3B1_ECLIP = BSFDataSetFromBigWig(ranges = rngEnd, meta = colData)

clipData = list(bdsSF3B1_WT = bdsSF3B1_WT, 
                bdsSF3B1_MUT = bdsSF3B1_MUT, 
                bdsU2AF2_WT = bdsU2AF2_WT,
                bdsSF3B1_ECLIP = bdsSF3B1_ECLIP)

For meta profiles iCLIP crosslinks are aligned at the canonical and alternative splice sites, in a symmetrical 200nt wide window. From all events (#N=7,951), only those with a distance from 0-100nt are shown (#N=4,478). Note that for each plot all events with zero crosslinks are also removed. The final number of ranges used is indicated above each plot.

3.1 Single heatmaps

Show code
makeHmMatrixList <- function(clipSet, plot.topLim.size = 1){
    ### --------------------------------------------------------------------------
    ### Ranges
    ### --------------------------------------------------------------------------
    # select ranges
    # -> select within 100nt distance
    rngEndCurr = rngEnd
    rngAltCurr = rngAlt
    rngEndCurr = rngEnd[rngEnd$distAltEnd <= 100]
    rngAltCurr = rngAlt[rngAlt$distAltEnd <= 100]
    
    # shuffle ranges around
    # -> to avoid auto sorting by P value
    set.seed(1234)
    reorderIdx = sample(1:length(rngEndCurr))
    rngEndCurr = rngEndCurr[reorderIdx,]
    rngAltCurr = rngAltCurr[reorderIdx,]
    
    
    ### --------------------------------------------------------------------------
    ### Coverage
    ### --------------------------------------------------------------------------
    # coverage 3'AS outer position
    bdsSel = setRanges(clipSet, resize(rngEndCurr, fix = "end", width = 1) + 200)
    cov1 = coverageOverRanges(bdsSel, returnOptions = "merge_all_replicates", method = "mean")
    m1 = cov1
    # coverage 3'AS inner position
    bdsSel = setRanges(clipSet, resize(rngAltCurr, fix = "end", width = 1) + 200)
    cov2 = coverageOverRanges(bdsSel, returnOptions = "merge_all_replicates", method = "mean")
    m2 = cov2
    # sorting by distance to 3'AS
    distIdx = order(rngEndCurr$distAltEnd, decreasing = FALSE)
    m1 = m1[distIdx,]
    m2 = m2[distIdx,]
    
    # remove rows with no signal
    m1 = m1[rowSums(m1) > 0,]
    m2 = m2[rowSums(m2) > 0,]
    
    # scale values in matrix for plotting
    m1 = minMaxNorm(m1)
    m2 = minMaxNorm(m2)
    m1 = t(apply(m1, 1, smoothing, lambda=0.05, dim=401))
    m2 = t(apply(m2, 1, smoothing, lambda=0.05, dim=401))
    
    # combine matrices
    keep = intersect(rownames(m1), rownames(m2))
    m1 = m1[rownames(m1) %in% keep,]
    m2 = m2[rownames(m2) %in% keep,]
    
    
    ### --------------------------------------------------------------------------
    ### Annotations
    ### --------------------------------------------------------------------------
    # set reference range for annotations
    range = rngEndCurr
    currM = m1
    # annotate with distance between alternative and constitutive 3'ss
    dfDist = data.frame(dist = range$distAltEnd[as.numeric(rownames(currM))])
    dfDist$dist[dfDist$dist > 100] = 100
    haDist = rowAnnotation(
        dist = anno_points(dfDist, size = unit(2, "points"), gp = gpar(col = "#595959")),
        width = unit(1, "cm"), annotation_name_gp = gpar(fontsize = 6)
    )
    # annotate with dPSI/ PSI values
    df = data.frame(deltaPSI = range$deltaPSI[as.numeric(rownames(currM))])
    haDeltaPSI = rowAnnotation(
        deltaPSI = anno_points(df, size = unit(2, "points"), gp = gpar(col = "#4c435a", alpha = 1), ylim = c(-1,1) ), 
        width = unit(2, "cm"), annotation_name_gp = gpar(fontsize = 6)
    )
    df = data.frame(psiWT = range$wtPSI[as.numeric(rownames(currM))])
    haWtPSI = rowAnnotation(
        psiWT = anno_points(df, size = unit(2, "points"), gp = gpar(col = "#0086b3", alpha = 0.5)), # , ylim = c(-1,0)
        width = unit(2, "cm"), annotation_name_gp = gpar(fontsize = 6)
    )
    df = data.frame(psiMUT = range$mutPSI[as.numeric(rownames(currM))])
    haMutPSI = rowAnnotation(
        psiMUT = anno_points(df, size = unit(2, "points"), gp = gpar(col = "#990000", alpha = 0.5)), # , ylim = c(-1,0)
        width = unit(2, "cm"), annotation_name_gp = gpar(fontsize = 6)
    )
    # annotate with significant results
    sig = range$sig[as.numeric(rownames(currM))]
    haSig = rowAnnotation(sigRes = sig, col = list(sigRes = c("FALSE" = "white", "TRUE" = "black")),
                          gp = gpar(lwd = 0.1),
                          show_annotation_name=F, annotation_name_gp = gpar(fontsize = 6),
                          annotation_legend_param = list(sigRes = list(nrow = 1, direction = "horizontal"))
    )
    # split by range
    split = factor(ifelse(dfDist$dist >= 12 & dfDist$dist <= 21, "2",
                          ifelse(dfDist$dist <= 12 & dfDist$dist <= 21, "1", "3")),
                   levels = c("1", "2", "3"))
    
    ### --------------------------------------------------------------------------
    ### Plotting
    ### --------------------------------------------------------------------------
    
    # use raw-data for barplot on top
    mm1 = cov1[rownames(cov1) %in% rownames(m1),]
    mm2 = cov2[rownames(cov2) %in% rownames(m2),]
    mm1 = mm1[,c(30:250)]
    mm2 = mm2[,c(100:320)]
    
    # formatting of matrix outline
    rownames(m1) = NULL
    colnames(m1) = -200:200
    colnames(m1)[as.numeric(colnames(m1)) %% 100 != 0] = ""
    rownames(m2) = NULL
    colnames(m2) = -200:200
    colnames(m2)[as.numeric(colnames(m2)) %% 100 != 0] = ""
    
    # change heatmap plotting frame
    m1 = m1[,c(30:250)]
    m2 = m2[,c(100:320)]
    
    # annotate with top col means profile
    df1 = data.frame(sums = colMeans(mm1))
    haMeans1 = HeatmapAnnotation(cov = anno_barplot(df1, gp = gpar(fill = "#595959", col = "#595959"), ylim = c(0, plot.topLim.size)),
                                 height = unit(2, "cm"), show_annotation_name=F, annotation_name_gp = gpar(fontsize = 6))
    df2 = data.frame(sums = colMeans(mm2))
    haMeans2 = HeatmapAnnotation(cov = anno_barplot(df2, gp = gpar(fill = "#595959", col = "#595959"), ylim = c(0, plot.topLim.size)),
                                 height = unit(2, "cm"), show_annotation_name=F, annotation_name_gp = gpar(fontsize = 6))
    
    # color scale
    custom.col = colorRamp2(c(0,0.05,0.1,0.15,0.2,0.25), viridis(6, option = "mako", direction = -1))
    
    # plot heatmap
    h1 = Heatmap(m1,
                 column_title = "3'AS outer", name = "iCLIP signal",
                 cluster_rows = FALSE, cluster_columns = FALSE,
                 show_column_names = TRUE, show_row_names = FALSE,
                 col = custom.col,
                 row_split = split, row_gap = unit(0.1, "cm"),
                 column_names_gp = gpar(fontsize = 8),
                 top_annotation = haMeans1,
                 right_annotation = c(haSig, haDeltaPSI, haWtPSI, haMutPSI),
                 border = TRUE,
                 use_raster = TRUE, raster_by_magick = TRUE,
                 raster_magick_filter = "Spline",
                 raster_quality = 1, raster_resize_mat = mean,
                 heatmap_legend_param = list(
                     legend_width = unit(6, "cm"),
                     title_position = "topleft", direction = "horizontal"
                 )
    )
    h2 = Heatmap(m2,
                 column_title = "3'AS inner", name = "iCLIP signal",
                 cluster_rows = FALSE, cluster_columns = FALSE,
                 show_column_names = TRUE, show_row_names = FALSE,
                 col = custom.col,
                 row_split = split, row_gap = unit(0.1, "cm"),
                 top_annotation = haMeans2,
                 left_annotation = c(haDist),
                 column_names_gp = gpar(fontsize = 8),
                 border = TRUE,
                 use_raster = TRUE, raster_by_magick = TRUE,
                 raster_magick_filter = "Spline",
                 raster_quality = 1, raster_resize_mat = mean,
                 heatmap_legend_param = list(
                     legend_width = unit(6, "cm"),
                     title_position = "topleft", direction = "horizontal"
                 )
    )
    plotList = h2 + h1
    matDim = dim(m1)
    l = list(plotList = plotList, matDim = matDim)
    return(l)
}
Show code
p = makeHmMatrixList(clipSet = clipData$bdsSF3B1_WT, plot.topLim.size = 1)
draw(p$plotList, ht_gap = unit(0.2, "cm"), heatmap_legend_side = "bottom", annotation_legend_side = "bottom",
     column_title = paste0("SF3B1-WT (#N:", format(p$matDim[1], big.mark = ","), ")"))

Show code
p = makeHmMatrixList(clipSet = clipData$bdsSF3B1_MUT, plot.topLim.size = 1)
draw(p$plotList, ht_gap = unit(0.2, "cm"), heatmap_legend_side = "bottom", annotation_legend_side = "bottom",
     column_title = paste0("SF3B1-MUT (#N:", format(p$matDim[1], big.mark = ","), ")"))

Show code
p = makeHmMatrixList(clipSet = clipData$bdsU2AF2_WT, plot.topLim.size = 3)
draw(p$plotList, ht_gap = unit(0.2, "cm"), heatmap_legend_side = "bottom", annotation_legend_side = "bottom",
     column_title = paste0("U2AF2-WT (#N:", format(p$matDim[1], big.mark = ","), ")"))

Show code
p = makeHmMatrixList(clipSet = clipData$bdsSF3B1_ECLIP, plot.topLim.size = 0.05)
draw(p$plotList, ht_gap = unit(0.2, "cm"), heatmap_legend_side = "bottom", annotation_legend_side = "bottom",
     column_title = paste0("SF3B1-eCLIP (#N:", format(p$matDim[1], big.mark = ","), ")"))

3.2 Combined heatmaps

Show code
plotHeatmaps <- function(plot.topLim.size = 1){
    ### --------------------------------------------------------------------------
    ### Ranges
    ### --------------------------------------------------------------------------
    rngEndCurr = rngEnd
    # shuffle ranges around
    # -> to avoid auto sorting by P value
    set.seed(1234)
    reorderIdx = sample(1:length(rngEndCurr))
    rngEndCurr = rngEndCurr[reorderIdx,]
    
    # cut ranges to window size
    rngEndCurr = rngEndCurr[rngEndCurr$distAltEnd <= 100]
    rngEndCurr = resize(rngEndCurr, fix = "end", width = 1) + 200
    
    ### --------------------------------------------------------------------------
    ### Coverage
    ### --------------------------------------------------------------------------
    
    # make cov ewt
    bds.wt = setRanges(bdsSF3B1_WT, rngEndCurr)
    cov.wt = coverageOverRanges(bds.wt, returnOptions = "merge_all_replicates", method = "mean")
    cov.wt = cov.wt[rowSums(cov.wt) > 0,]
    
    # make cov mut
    bds.mut = setRanges(bdsSF3B1_MUT, rngEndCurr)
    cov.mut = coverageOverRanges(bds.mut, returnOptions = "merge_all_replicates", method = "mean")
    cov.mut = cov.mut[rowSums(cov.mut) > 0,]
    
    # make cov u23
    bds.u2 = setRanges(bdsU2AF2_WT, rngEndCurr)
    cov.u2 = coverageOverRanges(bds.u2, returnOptions = "merge_all_replicates", method = "mean")
    cov.u2 = cov.u2[rowSums(cov.u2) > 0,]
    
    # combine matrices
    keep = intersect(rownames(cov.wt), rownames(cov.mut))
    cov.wt = cov.wt[rownames(cov.wt) %in% keep,]
    cov.mut = cov.mut[rownames(cov.mut) %in% keep,]
    
    keep = intersect(rownames(cov.wt), rownames(cov.u2))
    cov.wt = cov.wt[rownames(cov.wt) %in% keep,]
    cov.mut = cov.mut[rownames(cov.mut) %in% keep,]
    cov.u2 = cov.u2[rownames(cov.u2) %in% keep,]
    
    m1 = cov.wt
    m2 = cov.mut
    m3 = cov.u2
    
    # sorting by distance to 3'AS
    
    currDist = rngEndCurr$distAltEnd[as.numeric(rownames(cov.wt))]
    distIdx = order(currDist, decreasing = FALSE)
    m1 = m1[distIdx,]
    m2 = m2[distIdx,]
    m3 = m3[distIdx,]
    
    # scale values in matrix for plotting
    m1 = minMaxNorm(m1)
    m2 = minMaxNorm(m2)
    m3 = minMaxNorm(m3)
    m1 = t(apply(m1, 1, smoothing, lambda=0.05, dim=401))
    m2 = t(apply(m2, 1, smoothing, lambda=0.05, dim=401))
    m3 = t(apply(m3, 1, smoothing, lambda=0.05, dim=401))
    
    ### --------------------------------------------------------------------------
    ### Annotations
    ### --------------------------------------------------------------------------
    # set reference range for annotations
    range = rngEndCurr
    currM = m1
    # annotate with distance between alternative and constitutive 3'ss
    dfDist = data.frame(dist = range$distAltEnd[as.numeric(rownames(currM))])
    dfDist$dist[dfDist$dist > 100] = 100
    haDist = rowAnnotation(
        dist = anno_points(dfDist, size = unit(2, "points"), gp = gpar(col = "#595959")),
        width = unit(1, "cm"), annotation_name_gp = gpar(fontsize = 6)
    )
    # annotate with dPSI/ PSI values
    df = data.frame(deltaPSI = range$deltaPSI[as.numeric(rownames(currM))])
    haDeltaPSI = rowAnnotation(
        deltaPSI = anno_points(df, size = unit(2, "points"), gp = gpar(col = "#4c435a", alpha = 1), ylim = c(-1,1) ), 
        width = unit(2, "cm"), annotation_name_gp = gpar(fontsize = 6)
    )
    df = data.frame(psiWT = range$wtPSI[as.numeric(rownames(currM))])
    haWtPSI = rowAnnotation(
        psiWT = anno_points(df, size = unit(2, "points"), gp = gpar(col = "#0086b3", alpha = 0.5)), # , ylim = c(-1,0)
        width = unit(2, "cm"), annotation_name_gp = gpar(fontsize = 6)
    )
    df = data.frame(psiMUT = range$mutPSI[as.numeric(rownames(currM))])
    haMutPSI = rowAnnotation(
        psiMUT = anno_points(df, size = unit(2, "points"), gp = gpar(col = "#990000", alpha = 0.5)), # , ylim = c(-1,0)
        width = unit(2, "cm"), annotation_name_gp = gpar(fontsize = 6)
    )
    # annotate with significant results
    sig = range$sig[as.numeric(rownames(currM))]
    haSig = rowAnnotation(sigRes = sig, col = list(sigRes = c("FALSE" = "white", "TRUE" = "black")),
                          gp = gpar(lwd = 0.1),
                          show_annotation_name=F, annotation_name_gp = gpar(fontsize = 6),
                          annotation_legend_param = list(sigRes = list(nrow = 1, direction = "horizontal"))
    )
    # split by range
    split = factor(ifelse(dfDist$dist >= 12 & dfDist$dist <= 21, "2",
                          ifelse(dfDist$dist <= 12 & dfDist$dist <= 21, "1", "3")),
                   levels = c("1", "2", "3"))
    
    ### --------------------------------------------------------------------------
    ### Plotting
    ### --------------------------------------------------------------------------
    
    # use raw-data for barplot on top
    mm1 = cov.wt[,c(30:250)]
    mm2 = cov.mut[,c(30:250)]
    mm3 = cov.u2[,c(30:250)]
    
    # color scale
    # formatting of matrix outline
    rownames(m1) = NULL
    colnames(m1) = -200:200
    colnames(m1)[as.numeric(colnames(m1)) %% 50 != 0] = ""
    rownames(m2) = NULL
    colnames(m2) = -200:200
    colnames(m2)[as.numeric(colnames(m2)) %% 50 != 0] = ""
    rownames(m3) = NULL
    colnames(m3) = -200:200
    colnames(m3)[as.numeric(colnames(m3)) %% 50 != 0] = ""
    
    # change heatmap plotting frame
    m1 = m1[,c(30:250)]
    m2 = m2[,c(30:250)]
    m3 = m3[,c(30:250)]
    
    # annotate with top col means profile
    df1 = data.frame(sums = colMeans(mm1))
    haMeans1 = HeatmapAnnotation(cov = anno_barplot(df1, gp = gpar(fill = "#595959", col = "#595959"), ylim = c(0, plot.topLim.size)),
                                 height = unit(2, "cm"), show_annotation_name=F, annotation_name_gp = gpar(fontsize = 6))
    df2 = data.frame(sums = colMeans(mm2))
    haMeans2 = HeatmapAnnotation(cov = anno_barplot(df2, gp = gpar(fill = "#595959", col = "#595959"), ylim = c(0, plot.topLim.size)),
                                 height = unit(2, "cm"), show_annotation_name=F, annotation_name_gp = gpar(fontsize = 6))
    df3 = data.frame(sums = colMeans(mm3))
    haMeans3 = HeatmapAnnotation(cov = anno_barplot(df3, gp = gpar(fill = "#595959", col = "#595959"), ylim = c(0, plot.topLim.size)),
                                 height = unit(2, "cm"), show_annotation_name=F, annotation_name_gp = gpar(fontsize = 6))
    
    # color scale
    custom.col = colorRamp2(c(0,0.05,0.1,0.15,0.2,0.25), viridis(6, option = "mako", direction = -1))
    
    # plot heatmap
    h1 = Heatmap(m1,
                 column_title = "SF3B1 WT", name = "iCLIP signal",
                 cluster_rows = FALSE, cluster_columns = FALSE,
                 show_column_names = TRUE, show_row_names = FALSE,
                 col = custom.col,
                 row_split = split, row_gap = unit(0.1, "cm"),
                 column_names_gp = gpar(fontsize = 8),
                 top_annotation = haMeans1,
                 right_annotation = c(haSig, haDeltaPSI, haWtPSI, haMutPSI),
                 border = TRUE,
                 use_raster = TRUE, raster_by_magick = TRUE,
                 raster_magick_filter = "Spline",
                 raster_quality = 1, raster_resize_mat = mean,
                 heatmap_legend_param = list(
                     legend_width = unit(6, "cm"),
                     title_position = "topleft", direction = "horizontal"
                 )
    )
    h2 = Heatmap(m2,
                 column_title = "SF3B1 MUT", name = "iCLIP signal",
                 cluster_rows = FALSE, cluster_columns = FALSE,
                 show_column_names = TRUE, show_row_names = FALSE,
                 col = custom.col,
                 row_split = split, row_gap = unit(0.1, "cm"),
                 top_annotation = haMeans2,
                 column_names_gp = gpar(fontsize = 8),
                 border = TRUE,
                 use_raster = TRUE, raster_by_magick = TRUE,
                 raster_magick_filter = "Spline",
                 raster_quality = 1, raster_resize_mat = mean,
                 heatmap_legend_param = list(
                     legend_width = unit(6, "cm"),
                     title_position = "topleft", direction = "horizontal"
                 )
    )
    h3 = Heatmap(m3,
                 column_title = "U2AF2 WT", name = "iCLIP signal",
                 cluster_rows = FALSE, cluster_columns = FALSE,
                 show_column_names = TRUE, show_row_names = FALSE,
                 col = custom.col,
                 row_split = split, row_gap = unit(0.1, "cm"),
                 top_annotation = haMeans3,
                 column_names_gp = gpar(fontsize = 8),
                 border = TRUE,
                 use_raster = TRUE, raster_by_magick = TRUE,
                 raster_magick_filter = "Spline",
                 raster_quality = 1, raster_resize_mat = mean,
                 heatmap_legend_param = list(
                     legend_width = unit(6, "cm"),
                     title_position = "topleft", direction = "horizontal"
                 )
    )
    plotList = h3 + h2 + h1
    matDim = dim(m1)
    l = list(plotList = plotList, matDim = matDim)
    return(l)
}
Show code
p = plotHeatmaps(plot.topLim.size = 3)
draw(p$plotList, ht_gap = unit(0.2, "cm"), heatmap_legend_side = "bottom", annotation_legend_side = "bottom",
     column_title = paste0("Combined (#N:", format(p$matDim[1], big.mark = ","), ")"))

Splicing heatmaps of SF3B1 and U2AF2 at splice sites

4 PSI distance plots

The following plots show at which distances the canonical splice sites is preferred over the alternative and vice versa. For the zoom out version usage fraction per distance is computed as a rolling mean over 20nt. For the zoom in fractions are shown on a per nt level.

Show code
df = rngEnd %>% as.data.frame() %>%
    mutate(sel = ifelse(wtPSI < 0.5, "Low", "High")) %>%
    group_by(distAltEnd, sel) %>% 
    summarise(n = n(), .groups = "keep") %>% 
    pivot_wider(values_from = n, names_from = sel) %>%
    mutate(distAltEnd = as.integer(distAltEnd)) %>%
    mutate(High = ifelse(is.na(High), 0, High)) %>%
    mutate(Low = ifelse(is.na(Low), 0, Low)) %>%
    ungroup() %>% 
    mutate(rs_high = rollapply(data = High, width = 20, FUN = mean, align = "left", fill = NA)) %>%
    mutate(rs_low = rollapply(data = Low, width = 20, FUN = mean, align = "left", fill = NA)) %>%
    group_by(distAltEnd) %>% 
    summarise(High = sum(rs_high), Low = sum(rs_low), Sum = sum(rs_high, rs_low), .groups = "keep") %>%
    mutate(FracHigh = High/Sum, FracLow = Low/Sum) %>%
    select(distAltEnd, FracHigh, FracLow) %>% 
    pivot_longer(-distAltEnd) %>%
    mutate(bin = as.numeric(distAltEnd)) %>% 
    drop_na()

ggplot() +
    geom_line(data = df, aes(x = bin, y = value, color = name, group = name), size = 0.5) +
    geom_smooth(data = df, aes(x = bin, y = value, color = name, group = name),
                method = "loess", se = FALSE, size = 1, span = 0.1) +
    theme_pub() +
    scale_color_npg(name = "3' splice site usage", labels = c("3'SS (constitutive)", "*3'SS (alternative)")) +
    labs(
        x = "Splice site distance (running mean [nt])",
        y = "Usage fraction",
        color = "WT inclusion level"
    ) +
    theme(legend.position = "top", aspect.ratio = 1) +
    xlim(0,250) 

Show code
ggsave("psiDistance_zoom_out.pdf", width = 4, height = 4, device = "pdf")
Show code
df = rngEnd %>% as.data.frame() %>%
    mutate(sel = ifelse(wtPSI < 0.5, "Low", "High")) %>%
    group_by(distAltEnd, sel) %>% 
    summarise(n = n()) %>% 
    pivot_wider(values_from = n, names_from = sel) %>%
    mutate(distAltEnd = as.integer(distAltEnd)) %>%
    mutate(High = ifelse(is.na(High), 0, High)) %>%
    mutate(Low = ifelse(is.na(Low), 0, Low)) %>%
    group_by(distAltEnd) %>%
    summarise(High = sum(High), Low = sum(Low), Sum = sum(High, Low)) %>%
    mutate(FracHigh = High/Sum, FracLow = Low/Sum) %>%
    select(distAltEnd, FracHigh, FracLow) %>% 
    pivot_longer(-distAltEnd) %>%
    mutate(bin = as.numeric(distAltEnd))

dfN = rngEnd %>% as.data.frame() %>%
    select(distAltEnd, wtPSI) %>%
    mutate(bin = cut_width(distAltEnd, width = 10, boundary = 0)) %>%
    mutate(bin = as.numeric(bin)) %>%
    select(c(bin, wtPSI))

ggplot() +
    ggrastr::rasterise(geom_quasirandom(data = dfN, aes(x = bin, y = wtPSI), color = "#808080", size = 0.1, alpha = 0.5), dpi = 300) +
    geom_line(data = df, aes(x = bin, y = value, color = name, group = name), size = 0.8) +
    geom_smooth(data = df, aes(x = bin, y = value, color = name, group = name),
                method = "loess", se = FALSE, size = 1, span = 0.3) +
    theme_pub() +
    scale_color_npg(name = "3' splice usage", labels = c("3'SS (canonical)", "*3'SS (alternative)")) +
    labs(
        x = "Splice site distance [nt]",
        y = "Usage fraction",
        color = "WT inclusion level"
    ) +
    theme(legend.position = "top") +
    scale_y_continuous(
        breaks = c(seq(from = -0.2, to = 1.2, by = 0.2)),
        name = "Usage fraction",
        sec.axis = sec_axis(~., name = "WT-PSI distribution")
    ) +
    xlim(0,50)

Show code
ggsave("psiDistance_zoom_in.pdf", width = 4, height = 4, device = "pdf")
Show code
ggplot() +
    ggrastr::rasterise(geom_quasirandom(data = dfN, aes(x = bin, y = wtPSI), color = "#808080", size = 0.1, alpha = 0.5), dpi = 300) +
    geom_line(data = df, aes(x = bin, y = value, color = name, group = name), size = 0.8) +
    geom_smooth(data = df, aes(x = bin, y = value, color = name, group = name),
                method = "loess", se = FALSE, size = 1, span = 0.3) +
    theme_pub() +
    scale_color_npg(name = "3' splice usage", labels = c("3'SS (canonical)", "*3'SS (alternative)")) +
    labs(
        x = "Splice site distance [nt]",
        y = "Usage fraction",
        color = "WT inclusion level"
    ) +
    theme(legend.position = "top") +
    scale_y_continuous(
        breaks = c(seq(from = -0.2, to = 1.2, by = 0.2)),
        name = "Usage fraction",
        sec.axis = sec_axis(~., name = "WT-PSI distribution")
    ) +
    xlim(0,30)

Show code
ggsave("psiDistance_zoom_in2.pdf", width = 4, height = 4, device = "pdf")

5 Session Information

Show code
sessionInfo()
R version 4.2.1 (2022-06-23)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Big Sur ... 10.16

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats4    grid      stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] zoo_1.8-12                  ggbeeswarm_0.7.2           
 [3] ggsci_3.0.0                 patchwork_1.1.2            
 [5] circlize_0.4.15             readr_2.1.4                
 [7] BindingSiteFinder_1.7.8     kableExtra_1.3.4           
 [9] ComplexHeatmap_2.14.0       tidyr_1.3.0                
[11] dplyr_1.1.2                 tibble_3.2.1               
[13] viridis_0.6.3               viridisLite_0.4.2          
[15] GenomicAlignments_1.33.1    Rsamtools_2.13.4           
[17] Biostrings_2.65.6           XVector_0.37.1             
[19] SummarizedExperiment_1.27.3 MatrixGenerics_1.9.1       
[21] matrixStats_1.0.0           GenomicFeatures_1.49.7     
[23] AnnotationDbi_1.59.1        Biobase_2.57.1             
[25] rtracklayer_1.57.0          GenomicRanges_1.49.1       
[27] GenomeInfoDb_1.33.10        IRanges_2.31.2             
[29] S4Vectors_0.35.4            BiocGenerics_0.43.4        
[31] ggplot2_3.4.2               reshape2_1.4.4             
[33] scales_1.2.1                gridExtra_2.3              
[35] gridGraphics_0.5-1          knitr_1.43                 

loaded via a namespace (and not attached):
  [1] BiocFileCache_2.5.2    systemfonts_1.0.4      plyr_1.8.8            
  [4] splines_4.2.1          BiocParallel_1.31.13   digest_0.6.31         
  [7] foreach_1.5.2          htmltools_0.5.5        fansi_1.0.4           
 [10] magrittr_2.0.3         memoise_2.0.1          cluster_2.1.4         
 [13] doParallel_1.0.17      tzdb_0.4.0             vroom_1.6.3           
 [16] svglite_2.1.1          prettyunits_1.1.1      colorspace_2.1-0      
 [19] blob_1.2.4             rvest_1.0.3            rappdirs_0.3.3        
 [22] ggdist_3.3.0           textshaping_0.3.6      xfun_0.39             
 [25] crayon_1.5.2           RCurl_1.98-1.12        jsonlite_1.8.5        
 [28] iterators_1.0.14       glue_1.6.2             polyclip_1.10-4       
 [31] gtable_0.3.3           zlibbioc_1.43.0        webshot_0.5.4         
 [34] GetoptLong_1.0.5       DelayedArray_0.23.2    distributional_0.3.2  
 [37] shape_1.4.6            DBI_1.1.3              Rcpp_1.0.10           
 [40] progress_1.2.2         clue_0.3-64            bit_4.0.5             
 [43] htmlwidgets_1.6.2      httr_1.4.6             RColorBrewer_1.1-3    
 [46] pkgconfig_2.0.3        XML_3.99-0.14          farver_2.1.1          
 [49] dbplyr_2.3.2           utf8_1.2.3             tidyselect_1.2.0      
 [52] labeling_0.4.2         rlang_1.1.1            munsell_0.5.0         
 [55] tools_4.2.1            cachem_1.0.8           cli_3.6.1             
 [58] generics_0.1.3         RSQLite_2.3.1          evaluate_0.21         
 [61] stringr_1.5.0          fastmap_1.1.1          yaml_2.3.7            
 [64] ragg_1.2.5             bit64_4.0.5            purrr_1.0.1           
 [67] KEGGREST_1.37.3        nlme_3.1-162           ggrastr_1.0.2         
 [70] xml2_1.3.4             biomaRt_2.53.3         compiler_4.2.1        
 [73] rstudioapi_0.14        beeswarm_0.4.0         filelock_1.0.2        
 [76] curl_5.0.1             png_0.1-8              tweenr_2.0.2          
 [79] stringi_1.7.12         lattice_0.21-8         Matrix_1.5-4.1        
 [82] vctrs_0.6.3            pillar_1.9.0           lifecycle_1.0.3       
 [85] GlobalOptions_0.1.2    bitops_1.0-7           R6_2.5.1              
 [88] BiocIO_1.7.1           vipor_0.4.5            codetools_0.2-19      
 [91] MASS_7.3-60            rjson_0.2.21           withr_2.5.0           
 [94] GenomeInfoDbData_1.2.9 mgcv_1.8-42            parallel_4.2.1        
 [97] hms_1.1.3              rmarkdown_2.22         Cairo_1.6-0           
[100] ggforce_0.4.1          restfulr_0.0.15